qui {
noisily {
	/*************************************************/
	/* Step 1.5. Dependent variable: GDP per capita **/
	/*************************************************/

/*
1.)
To gain panel data on the evolution of GDP per capita, our BASELINE strategy is the following:
	- Collect per capita GDP data from Madison, extend by using WDI-data
	- Drop observations pertaining to the pre-independence period in former Soviet members
	- Estimate a third-order polynomial in either (i) co2 emissions or (ii) primary energy consumption, year and region dummies to approximate missing data.
	- Use the implied growth rates in these estimated trajectories to maximally extend the Madison/WDI-data forward and backwards.
	- Result = composite index of GDP per capita, expressed in international GK $

2.)
To gain panel data on the evolution of GDP per capita, our ALTERNATIVE strategy is the following:
	- Collect GDP per capita data from various sources
	- Linearly interpolate missing values in all data sources
	- Select a base series, the most complete dataset (in this case: Madison data on GDP per capita in int. GK$)
	- From the additional sources, select the one for which the overlapping GDP per capita paths are most strongly correlated with the Madison data.
	- Use this data to approximate missing values in the base series.
	
		* IF THE MISSING VALUES ARE RELATED TO A COUNTRY ALREADY APPEARING IN MADISON:
		-> Use the evolution in GDP per capita in this selected dataset to complete missings in the Madison data, eg.
			gdpcap_it = (gdpcap_selected_it/gdpcap_selected_it-1)*gdpcap_Madison_it-1 if gdpcap_it is missing
			gdpcap_it = (gdpcap_selected_it/gdpcap_selected_it+1)*gdpcap_Madison_it+1 if gdpcap_it is missing
			
		IF THE MISSING VALUES ARE RELATED TO A COUNTRY NOT COVERED BY MADISON:
		-> approximate by using GDP per capita in the US as a benchmark
			gdpcap_it = (gdpcap_selected_it/gdpcap_selected_USt)*gdpcap_Madison_USt if gdpcap_it is missing
			
	- Proceed by using the second most strongly correlated source and fill in as much missings as possible
	- Result = composite index of GDP per capita, expressed in international GK $
*/
}

		/* 1.5.1. Baseline dataset */
		
		noi di "BASELINE DATASET:"
		noi di ""
		
		/* 1.5.1.1. Load data */

			/* 1.5.1.1.1. gather Madison + WDI data */
			cd ..
			cd ".\3. Intermediary results"
			foreach name in  "Madison2017" "WDI" "barlee" "PWT8" "PWT56" "TED" {
					merge 1:1 year cntrycode_`name' using `name', keepusing(gdpcap_`name')
					destring gdpcap_`name', replace
					drop if _merge == 2
					drop _merge
					}
			rename gdpcap_Madison2017 gdpcap_Madison
			
			/* 1.5.1.1.2. Label variables indicating their sources */
			label var gdpcap_WDI "GDP per capita (constant 2005 US$, source: WDI)"
			label var gdpcap_Madison "GDP per capita (gdp per capita (1990 Int. GK$), source: Madison)"
			label var gdpcap_barlee "GDP per capita (Real GDP per capita (1985 international prices), source: Barro&Lee)"
			label var gdpcap_PWT8 "GDP per capita (Expenditure-side real GDP at chained PPPs (2005US$), source: PWT 8)"
			label var gdpcap_PWT56 "GDP per capita (Real GDP per capita, source: PWT 5.6)"
			label var gdpcap_TED "GDP per Capita, in 1990 GK$, source: TED)"
			
			
		/* 1.5.1.2. Madison: Clean some post-1945 isolated data points pertaining to former Soviet members */
		noi di ""
		noi di "Madison isolated GDP per capita data points removed:"
		sort cntrycode year
		noi list cntry year if gdpcap_Madison != . & F.gdpcap_Madison == . & L.gdpcap_Madison == . & year > 1945	// List observations to be dropped
		noi replace gdpcap_Madison = . if F.gdpcap_Madison == . & L.gdpcap_Madison == . & year > 1945 				// Drop isolated observations (eg. pre-independence estimates pertaining to former Soviet members)
		
		/* 1.5.1.3. Linearly interpolate missing values in all available datasets */
		local source = 0
		foreach name in  "Madison" "WDI" "PWT8" "PWT56" "TED" "barlee" {
			local source = `source'+1
			local source2 = `source'+900
			gen interpolated_gdpcap_`name' = 0 if gdpcap_`name' != .
			gen gdpcap_`name'_raw = gdpcap_`name' 																	// Keep raw data, to track number of linearly interpolated contributions
			bysort cntry: ipolate gdpcap_`name' year, generate(gdpcap_`name'1)
			replace gdpcap_`name' = gdpcap_`name'1
			drop gdpcap_`name'1
			replace interpolated_gdpcap_`name' = 1 if interpolated_gdpcap_`name' == . & gdpcap_`name' != .
			}
			
		/* 1.5.1.1.2. Use Maddison data as baseline */
		gen baseline_gdpcap = gdpcap_Madison if year >= $startyear
		gen source_gdpcap = 1 if baseline_gdpcap != .
		scalar correlation_gdpcap_1 = 1
		noi sum baseline_gdpcap
		
		* Identify interpolated parts
		gen interpolated_gdpcap = 0 & baseline_gdpcap != .
		replace interpolated_gdpcap = 1 if interpolated_gdpcap_Madison == 1 & baseline_gdpcap != .

		* Identify extrapolated parts
		gen extrapolated_gdpcap = 0  if baseline_gdpcap != .

		* identify polynomial predicted parts
		gen polynomialpredicted_gdpcap = 0  if baseline_gdpcap != .

		/* 1.5.1.1.3. Maximally extend forward and backward by growth rates implied in WDI data */
		
			* Show correlation with baseline data
			noi di "Correlation between Madison and WDI data"
			noisily corr gdpcap_Madison gdpcap_WDI if independence_years != . & year >= $startyear
			scalar correlation_gdpcap_2 = round(`r(rho)',.01)
			xtset cntrycode year
			
			foreach dataset in "WDI" {
															
				* Extend forward 
				local i = 999
				while `i' != 0 {
					sum baseline_gdpcap if independence_years != . & year >= $startyear
					local original = r(N)
					replace baseline_gdpcap = (1+(gdpcap_`dataset'-L.gdpcap_`dataset')/L.gdpcap_`dataset')*L.baseline_gdpcap if baseline_gdpcap == . & year >= $startyear
					sum baseline_gdpcap if independence_years != . & year >= $startyear
					local extended = r(N)
					local i = `extended' - `original'
					}
								
				* Extend backward
				local i = 999
				while `i' != 0 {
					sum baseline_gdpcap if independence_years != . & year >= $startyear
					local original = r(N)
					replace baseline_gdpcap = (1+(gdpcap_`dataset'-F.gdpcap_`dataset')/F.gdpcap_`dataset')*F.baseline_gdpcap if baseline_gdpcap == . & year >= $startyear
					sum baseline_gdpcap if independence_years != . & year >= $startyear
					local extended = r(N)
					local i = `extended' - `original'
					}

				* Identify source
				replace source_gdpcap = 992 if source_gdpcap == . & baseline_gdpcap != .
		
				* Identify interpolated parts
				replace interpolated_gdpcap = 1  if baseline_gdpcap != . & interpolated_gdpcap_`dataset' == 1 & interpolated_gdpcap == .
							
				* Identify extrapolated parts
				replace extrapolated_gdpcap = 1  if baseline_gdpcap != . & extrapolated_gdpcap == .
				
				* Extend for countries missing in non-overlapping time periods in reference data series
				replace baseline_gdpcap = gdpcap_`dataset' if baseline_gdpcap == .  & independence_years != . & year >= $startyear

				* Identify source
				replace source_gdpcap = 2 if source_gdpcap == . & baseline_gdpcap != .

				* Identify interpolated parts
				replace interpolated_gdpcap = 1  if baseline_gdpcap != . & interpolated_gdpcap_`dataset' == 1 & interpolated_gdpcap == .
				
				* Identify extrapolated parts
				replace extrapolated_gdpcap = 0 if baseline_gdpcap != . & extrapolated_gdpcap == .
			
				* Identify polynomial predicted parts
				replace polynomialpredicted_gdpcap = 0 if baseline_gdpcap != . & polynomialpredicted_gdpcap == .
				}

			/* 1.5.1.1.4.  Add data on CO2 emissions (co2) and primary energy consumption (pec)  */

				* Add data
				cd ..
				cd ".\3. Intermediary results"
				merge 1:1 year cntrycode_CAIT using CAIT, keepusing(co2)
				drop if _merge == 2
				drop _merge
				merge 1:1 year cntrycode_COW_NC using COW_NC, keepusing(pec)
				drop if _merge == 2
				drop _merge
				merge 1:1 year cntrycode_WDI using WDI, keepusing(co2_WDI)
				drop if _merge == 2
				drop _merge
					
			/* 1.5.1.1.5.   Define panel structure, keep relevant variables, clean data  */

				* Clean data
		
					* CAIT: Clean some suspicious data points
					replace co2 = . if co2 <= 0
					replace co2 = . if cntrycode_CAIT == "Ethiopia" & year == 1941 | cntrycode_CAIT == "Ethiopia" & year == 1942 | cntrycode_CAIT == "Ethiopia" & year == 1943 | cntrycode_CAIT == "Ethiopia" & year == 1948 | cntrycode_CAIT == "Ethiopia" & year == 1949 // Jumps from .0037 to .0917 in 1950					
					replace co2 = . if cntrycode_CAIT == "Madagascar" & year == 1933 | cntrycode_CAIT == "Madagascar" & year == 1934 | cntrycode_CAIT == "Madagascar" & year == 1935 | cntrycode_CAIT == "Madagascar" & year == 1945 | cntrycode_CAIT == "Madagascar" & year == 1947 | cntrycode_CAIT == "Madagascar" & year == 1948 | cntrycode_CAIT == "Madagascar" & year == 1949 // Jumps from 0.004 to 0.3 in 1950 
					replace co2 = . if cntrycode_CAIT == "Mozambique" & year < 1950	// Jumps from 0.055 to 0.905 in 1950
					
					* COW_NC: Clean some suspicious data points
					replace pec = . if pec <= 1
					replace pec = . if cntrycode_COW_NC == 530 & year <= 1969 // jumps from 1-800 to 7609 in 1970
					replace pec = . if cntrycode_COW_NC == 680 & year <= 1967 // jumps from 2 to 7076 in 1968
					replace pec = . if cntrycode_COW_NC == 680 & year <= 1967 // jumps from 2 to 7076 in 1968
					replace pec = . if cntrycode_COW_NC == 232 & year <= 1994 // jumps from 1 to 41 in 1995
					
					sort cntry year

					* Define log per capita co2 emissions/pec
					replace pec = pec/pop
					replace pec = ln(pec) 
					replace co2 = co2/pop
					replace co2 = ln(co2) 
					replace co2_WDI = ln(co2_WDI)
						
				* Declare panel structure
				xtset cntrycode year
				gen baseline_lgdpcap = ln(baseline_gdpcap)

				* Generate region dummies
				sort region
				egen regioncode = group(region)
				
				* Show correlation of co2-emmissions and primary energy consumption with baseline data
				noi di "Correlation with baseline series (Madison + WDI)"
				noisily corr co2 baseline_lgdpcap if independence_years != . & year >= $startyear
				noisily corr co2_WDI baseline_lgdpcap if independence_years != . & year >= $startyear
				noisily corr pec baseline_lgdpcap if independence_years != . & year >= $startyear
				 
			/* 1.5.1.1.6.   Predict missing country-years using other sources */
				noi di "Calculate predictions based on:", _continue
				
				foreach dataset in "co2_WDI" "co2" "pec" {
				
					noi di "`dataset'", _continue

					* Generate indicators:
								
						* Squared and cubic term per capita gdp proxy:
						
						gen `dataset'_sq = `dataset'^2
						gen `dataset'_cu = `dataset'^3
								
						* Year dummies
						
						tab year if year >= $startyear, gen(year_)
						
						local tyear = 2016 - $startyear + 1
						forval i = 1/`tyear' {
							local j = `i' + $startyear - 1
							rename year_`i' year_`j'
							}

						* Region dummies

						tab regioncode, gen(region_)

						* Interactions:
									
							* region and year				
						
							forval i = ${startyear}/2016 {
								forval j = 1/7 {
								gen year_`i'Xregion_`j' = year_`i' * region_`j'
								}
							}
										
							* year and per capita gdp proxy
							
							forval i = ${startyear}/2016 {
								gen year_`i'X`dataset' = year_`i' * `dataset'
								gen year_`i'X`dataset'_sq = year_`i' * `dataset' * `dataset'
								}
										
							* region and per capita gdp proxy
							
							forval j = 1/7 {
								gen region_`j'X`dataset' = region_`j' * `dataset'
								gen region_`j'X`dataset'_sq = region_`j' * `dataset' * `dataset'
								}
											
							* year, region and per capita gdp proxy
							
								forval i = ${startyear}/2016 {
									forval j = 1/7 {
									gen year_`i'Xregion_`j'X`dataset' = year_`i' * region_`j' * `dataset'
									}
								}

							* Predictions: 
							cap reg baseline_lgdpcap `dataset' `dataset'_sq `dataset'_cu year_* region_* if independence_years != . & year >= $startyear					
							scalar appr_`dataset'_r2 = round(e(r2_a), .0001)
							predict appr_`dataset' if independence_years != . & year >= $startyear
							
							predict appr_`dataset'_se if independence_years != . & year >= $startyear, stdp
							local N = e(df_r)
							gen appr_`dataset'_ub = appr_`dataset' + invttail(`N',.005)*appr_`dataset'_se
							gen appr_`dataset'_lb = appr_`dataset' - invttail(`N',.005)*appr_`dataset'_se
							gen predictive_accuracy_`dataset' = .
							replace predictive_accuracy_`dataset' = 1 if baseline_lgdpcap !=. & appr_`dataset' != . & appr_`dataset'_lb <= baseline_lgdpcap & baseline_lgdpcap <= appr_`dataset'_ub
							replace predictive_accuracy_`dataset' = 0 if baseline_lgdpcap !=. & appr_`dataset' != . & appr_`dataset'_lb > baseline_lgdpcap | baseline_lgdpcap !=. & appr_`dataset' != . & baseline_lgdpcap > appr_`dataset'_ub
							replace predictive_accuracy_`dataset' = 0 if baseline_lgdpcap !=. & appr_`dataset' != . & appr_`dataset'_lb > baseline_lgdpcap | baseline_lgdpcap !=. & appr_`dataset' != . & baseline_lgdpcap > appr_`dataset'_ub
							sum predictive_accuracy_`dataset' if independence_years != . & year >= $startyear
							scalar predictive_accuracy_`dataset' = r(mean)
							
							gen discr = baseline_lgdpcap - appr_`dataset' if independence_years != . & year >= $startyear
							sum discr
							scalar avg_discrepancy_`dataset' = r(mean)
							
							* Drop unnecesaries:
							
							drop year_* region_* `dataset'_sq `dataset'_cu appr_`dataset'_ub appr_`dataset'_lb appr_`dataset'_se predictive_* discr
							}
					noi di ""

				/* 1.5.1.1.7.  Report diagnostics */

				noi di "Correlation of predictions with baseline data:"
				local source = 3
				foreach dataset in "co2" "co2_WDI" "pec" {
					noi corr baseline_lgdpcap appr_`dataset' if independence_years != . & year >= $startyear
					scalar correlation_gdpcap_`source' = round(`r(rho)',.01)
					local source = `source'+1
					}

				noi di "Predictive accuracy:"
				foreach dataset in "co2" "co2_WDI" "pec" {
					noi scalar list predictive_accuracy_`dataset'
					}

				noi di "Average discrepancy between observed and predicted values:"
				foreach dataset in "co2" "co2_WDI" "pec" {
					noi scalar list avg_discrepancy_`dataset'
					}
			
				noi di "Adjusted R squared of regressions"
				foreach dataset in "co2" "co2_WDI" "pec" {
					noi scalar list appr_`dataset'_r2 
					}
							
				noi di "Approximated trajectories: summary statistics"
				noi sum baseline_lgdpcap appr_* if independence_years != . & year >= $startyear
				
				cd ..
				cd "./5. TeX results"
				preserve
				foreach var in baseline_lgdpcap co2_WDI co2 pec {
					egen `var'_s = std(`var') if year > 1940 & independence_years != . & independence_years >= 0
					replace `var' = `var'_s
					drop `var'_s
					}
					
				twoway 	(scatter baseline_lgdpcap pec, msymbol(Oh) mcolor(gs0) msize(medium)) ///
						(scatter baseline_lgdpcap co2, msymbol(Oh) mcolor(gs8) msize(medium)) ///
						(scatter baseline_lgdpcap co2_WDI, msymbol(Oh) mcolor(gs12) msize(medium)) ///
						(line co2_WDI co2_WDI, lwidth(thick) lcolor(gs0) lpattern(solid)) ///
						if year > $startyear & independence_years != . ///
						, ytitle("Standardized per capita GDP", size(large)) xtitle("Standardized per capita energy consumption", size(large)) graphregion(color(white)) ylab(,nogrid) /*ylab(0(0.2)0.6, nogrid) xlab(0(0.1)0.4, nogrid)*/ bgcolor(white) legend(ring(0) position(11) rows(4) order(3 "Primary energy consumption (COW)" 2 "CO2 emissions (CAIT)" 1 "CO2 emissions (WDI)" 4 "45°-line"))
						graph export figure2_a.pdf, replace
				restore

				twoway 	(scatter baseline_lgdpcap appr_pec, msymbol(Oh) mcolor(gs0) msize(medium)) ///
						(scatter baseline_lgdpcap appr_co2, msymbol(Oh) mcolor(gs8) msize(medium)) ///
						(scatter baseline_lgdpcap appr_co2_WDI, msymbol(Oh) mcolor(gs12) msize(medium)) ///
						(line appr_co2_WDI appr_co2_WDI, lwidth(thick) lcolor(gs0) lpattern(solid)) ///
						if year >  $startyear & independence_years != . & appr_pec < 11 & appr_pec > 5 ///
						, ytitle("Actual log per capita GDP", size(large)) xtitle("Predicted log per capita GDP", size(large)) graphregion(color(white)) ylab(4(1)12,nogrid) xlab(5(1)11) bgcolor(white) legend(ring(0) position(11) rows(4) order(3 "Primary energy consumption (COW)" 2 "CO2 emissions (CAIT)" 1 "CO2 emissions (WDI)" 4 "45°-line"))
						graph export figure2_b.pdf, replace
				cd ..
				cd "./3. Intermediary results"
			
			/* 1.5.1.1.8. Extend baseline data */
				
				noi di ""
				noi di "Extend baseline data using:", _continue
				xtset cntrycode year
				local source = 2
				foreach dataset in "co2_WDI" "co2" "pec" {
					
					local source = `source'+1
					local source2 = `source'+990
					noi di "`dataset'", _continue
					
					* Extend forward 
					local i = 999
					while `i' != 0 {
						sum baseline_lgdpcap if independence_years != . & year >= $startyear
						local original = r(N)
						replace baseline_lgdpcap = (1+(appr_`dataset'-L.appr_`dataset')/L.appr_`dataset')*L.baseline_lgdpcap if baseline_lgdpcap == . & year >= $startyear
						sum baseline_lgdpcap if independence_years != . & year >= $startyear
						local extended = r(N)
						local i = `extended' - `original'
						}
					
					* Extend backward
					local i = 999
					while `i' != 0 {
						sum baseline_lgdpcap if independence_years != . & year >= $startyear
						local original = r(N)
						replace baseline_lgdpcap = (1+(appr_`dataset'-F.appr_`dataset')/F.appr_`dataset')*F.baseline_lgdpcap if baseline_lgdpcap == . & year >= $startyear
						sum baseline_lgdpcap if independence_years != . & year >= $startyear
						local extended = r(N)
						local i = `extended' - `original'
						}

					* Identify source
					replace source_gdpcap = `source2' if source_gdpcap == . & baseline_lgdpcap != .
					
					* Extend for countries missing in reference data series
					replace baseline_lgdpcap = appr_`dataset' if baseline_lgdpcap == . & independence_years != . & year >= $startyear

					* Identify source
					replace source_gdpcap = `source' if source_gdpcap == . & baseline_lgdpcap != .

					* Identify interpolated parts
					replace interpolated_gdpcap = 0  if baseline_lgdpcap != . & interpolated_gdpcap == .
					
					* Identify extrapolated parts
					replace extrapolated_gdpcap = 0 if baseline_lgdpcap != . & extrapolated_gdpcap == .
					
					* Identify polynomial predicted parts
					replace polynomialpredicted_gdpcap = 1 if baseline_lgdpcap != . & polynomialpredicted_gdpcap == .
					}
					
				noi di ""
				
			/* 1.5.1.1.9. Remove suspicious predictions */
			noi di ""
			noi di "Suspicious predictions removed:"
			sum cntrycode if baseline_lgdpcap > 20 & baseline_lgdpcap != . | baseline_lgdpcap < 0 & baseline_lgdpcap != .
			if r(N) == 0 {
				noi di ""
				noi di "none"
				noi di ""
				}
			else if r(N) != 0 {
				noi list cntry year baseline_lgdpcap if baseline_lgdpcap > 20 & baseline_lgdpcap != . | baseline_lgdpcap < 0 & baseline_lgdpcap != .
				noi di ""
				replace baseline_lgdpcap = . if baseline_lgdpcap > 20 | baseline_lgdpcap < 0
				}
			
			/* 1.5.1.1.10. Extend per capita GDP series and linearly interpolate missing values */
			sum baseline_gdpcap baseline_lgdpcap
			replace baseline_gdpcap = exp(baseline_lgdpcap) if baseline_gdpcap == .
			
			foreach var in baseline_gdpcap baseline_lgdpcap {
				gen `var'_raw = `var' 								// Keep raw data, to track number of linearly interpolated contributions
				bysort cntry: ipolate `var' year, generate(`var'1)
				replace `var' = `var'1
				drop `var'1 `var'_raw
				}
			replace interpolated_gdpcap = 1 if source_gdpcap == . & baseline_gdpcap != .
			replace source_gdpcap = 0 if baseline_gdpcap != . & source_gdpcap == .
			
			/* 1.5.1.1.11. Report composition of data series */

				noi di ""
				noi di "Baseline data: summary statistics"
				noi sum gdpcap_Madison baseline_gdpcap baseline_lgdpcap if independence_years != . & year >= $startyear
			
			/* 1.5.1.1.12. Provide numerical example for Ukraine */
			
				gen lgdpcap_Madison	 = ln(gdpcap_TED)
							
				twoway 	(scatteri 9.71 1990 9.71 1980, bcolor(gs12) recast(area)) || ///
						(line baseline_lgdpcap year if gdpcap_Madison != . & cntry == "Ukraine" & year >= 1991, lcolor(gs0)) || ///
						(scatter co2 year if cntry == "Ukraine" & baseline_lgdpcap != . , msymbol(oh) mcolor(red) yaxis(2)) || ///
						(scatter appr_co2 year if cntry == "Ukraine" & baseline_lgdpcap != . , msymbol(Th) mcolor(gs8)) || ///
						(line baseline_lgdpcap year if gdpcap_Madison == . & cntry == "Ukraine"  & baseline_lgdpcap != . & year < 2000 | cntry == "Ukraine" & year >= 1990 & year <= 1991, lcolor(gs8) lwidth(thick)) || ///
						(scatter lgdpcap_Madison year if cntry == "Ukraine" & year < 1991, msymbol(X) mcolor(green) msize(large) lwidth(vthick)) ///
						if year >= 1960 & year <= 2010, ylab(none, axis(2)) ytitle("(predicted) log per capita GDP") ytitle("Per capita CO2-emissions", axis(2))  xtitle(Year, size(large)) legend(order(2 "Baseline (Madison, 2017)" 6 "Per capita CO2-emissions (WRI, 2015)" 3 "Polynomial predicted (CO2-emissions)" 4 "Polynomial extended" 5 "Pre-independence per capita GDP (alternative)") size(vsmall)) xline(1991, lcolor(red) lpattern(shortdash)) graphregion(color(white)) bgcolor(white)
				cd ..
				cd ".\5. TeX results"
				graph export figure2_c.pdf, replace
				cd ..
				cd "./3. Intermediary results"
		
			/* 1.5.1.1.13. Drop unnecesaries, add relevant data */
			
				drop appr_* co2* pec 
				replace baseline_gdpcap = exp(baseline_lgdpcap)
			
		/* 1.5.2. Alternative dataset */
		noi di ""
		noi di "ALTERNATIVE DATASET:"
		noi di ""
							
			/* 1.5.2.1. Generate dependent variable, use Madison data as base series (most observations) */
			gen alternative_gdpcap = gdpcap_Madison
			
			/* 1.5.2.2. Complete by using other data sources, giving primacy to those datasets strongest correlated with the Madison data */
			
			xtset cntrycode year
			noi di ""
			noi di "Extend baseline data using:", _continue

			foreach dataset in "TED" "barlee" "PWT56" "WDI" "PWT8" {
				
				noi di "`dataset'", _continue
				
				corr gdpcap_Madison gdpcap_`dataset' if independence_years != . & year >= $startyear
				scalar correlation_Madison_`dataset' = r(rho)
									
				* Extend forward 
				local i = 999
				while `i' != 0 {
					sum alternative_gdpcap  if independence_years != . & year >= $startyear
					local original = r(N)
					replace alternative_gdpcap = (1+(gdpcap_`dataset'-L.gdpcap_`dataset')/L.gdpcap_`dataset')*L.alternative_gdpcap if alternative_gdpcap == . & year >= $startyear
					sum alternative_gdpcap  if independence_years != . & year >= $startyear
					local extended = r(N)
					local i = `extended' - `original'
					}
					
				* Extend backward
				local i = 999
				while `i' != 0 {
					sum alternative_gdpcap  if independence_years != . & year >= $startyear
					local original = r(N)
					replace alternative_gdpcap = (1+(gdpcap_`dataset'-F.gdpcap_`dataset')/F.gdpcap_`dataset')*F.alternative_gdpcap if alternative_gdpcap == . & year >= $startyear
					sum alternative_gdpcap  if independence_years != . & year >= $startyear
					local extended = r(N)
					local i = `extended' - `original'
					}
						
				* Extend for countries missing in reference data series
				replace alternative_gdpcap = gdpcap_`dataset' if alternative_gdpcap == . & independence_years != . & year >= $startyear
				}

				noi di ""
				noi di ""
				
			/* 1.5.2.3. Correctly signify missings */

			replace alternative_gdpcap = . if alternative_gdpcap == 0

			/* 1.5.2.4. Compute logarithm */

			gen alternative_lgdpcap = ln(alternative_gdpcap)

			/* 1.5.2.5. Label variables */
			
			label var alternative_gdpcap "GDP per capita (Alternative)"
			label var alternative_lgdpcap "Log GDP per capita (Alternative)"
			label var baseline_gdpcap "GDP per capita (Baseline)"
			label var baseline_lgdpcap "Log GDP per capita (Baseline)"

			/* 1.5.2.6. Report modelling implications */

			
			noi di "Correlation with Madison data:"
			foreach dataset in "TED" "barlee" "PWT56" "WDI" "PWT8" {
				noi corr gdpcap_Madison gdpcap_`dataset'
				}

			noi di "Baseline + alternative data: summary statistics"
			noi sum baseline_lgdpcap alternative_lgdpcap  if independence_years != . & year >= $startyear
			
			noi di ""
			noi di "Correlation between baseline data and alternative data:"
			noi corr baseline_lgdpcap alternative_lgdpcap  if independence_years != . & year >= $startyear

			/* 1.5.2.7. Drop raw data */
			
			drop gdpcap_*
			
			/* 1.5.2.8. Generate estimates of total GD in each country & label them */
			gen baseline_GDP = baseline_gdpcap * pop
			gen alternative_GDP = alternative_gdpcap * pop
			label var baseline_GDP "GDP (1990 GK $)"
			label var alternative_GDP "GDP (1990 GK $)"
			
			/* 1.5.2.9. Generate estimates of market potential, defined as spatially decayed GDP in the area */
			
				* Load & prepare spatial data
				// Source: http://www.cepii.fr/CEPII/en/bdd_modele/download.asp?id=6
				preserve
				cd ..
				cd "./2. Raw data"
				use GeoDist.dta, clear
				keep iso_o iso_d dist distcap
				
				* Collect bilateral distances between capitals
				drop dist
				replace distcap = 1/distcap
				reshape wide distcap, i(iso_o) j(iso_d) string
				
				* Rename variables
				duplicates drop iso_o, force
				rename iso_o cntrycode_ISO
				
				* Add data on missing countries, use data on most nearby country

					* East Timor = Indonesia
					expand 2 if cntrycode_ISO == "IND", gen(expanded)
					replace cntrycode_ISO = "74" if expanded == 1
					gen distcap74 = distcapIND
					drop expanded
					
					* Liechtenstein = Switzerland
					expand 2 if cntrycode_ISO == "SWZ", gen(expanded)
					replace cntrycode_ISO = "161" if expanded == 1
					gen distcap161 = distcapSWZ
					drop expanded
					
					* Monaco = France
					expand 2 if cntrycode_ISO == "FRA", gen(expanded)
					replace cntrycode_ISO = "185" if expanded == 1
					gen distcap185 = distcapFRA
					drop expanded
					
					* Montenegro = Albania
					expand 2 if cntrycode_ISO == "ALB", gen(expanded)
					replace cntrycode_ISO = "187" if expanded == 1
					gen distcap187 = distcapALB
					drop expanded
					
					* South Korea = Korea
					expand 2 if cntrycode_ISO == "KOR", gen(expanded)
					replace cntrycode_ISO = "259" if expanded == 1
					gen distcap259 = distcapKOR
					drop expanded
					
					* Suriname = Guyana
					expand 2 if cntrycode_ISO == "GUY", gen(expanded)
					replace cntrycode_ISO = "268" if expanded == 1
					gen distcap268 = distcapGUY
					drop expanded
					
					* Serbia = Croatia
					expand 2 if cntrycode_ISO == "HRV", gen(expanded)
					replace cntrycode_ISO = "317" if expanded == 1
					gen distcap317 = distcapHRV
					drop expanded
				
				* Save inter-capital distances
				cd ..
				cd "./3. Intermediary results"
				save intercapital_distances, replace
				restore
				
				* Add them to the data
				merge m:1 cntrycode_ISO using intercapital_distances
				drop if _merge == 2
				drop _merge
				drop if cntry == ""
				erase intercapital_distances.dta
				
				* Compute measure of market potential
				noi di "Compute measure of market potential (/317):"
				gen market_potential = .
				levelsof cntrycode_ISO, local(cntries)
				local count = 1
				foreach country in `cntries' {
					noi di `count', _continue
					local count = `count'+1
					sum cntrycode if cntrycode_ISO == "`country'"
					local cntry = r(mean)
					cap gen GDP_`country' = baseline_GDP*distcap`country' if distcap`country' != 0
					if _rc == 0 {
						foreach year of numlist ${startyear}/2016 {
							sum GDP_`country' if cntrycode != `cntry' & year == `year' & independence_years != . & independence_years >= 0
							replace market_potential = r(sum) if year == `year' & cntrycode == `cntry'
							}
						}
					cap drop GDP_`country' 
					}
				
				* Drop unnecessary data
				drop distcap*
								
* Reroute to directory containing dofiles
cd ..
cd ".\1. Dofiles"
}
